import pandas as pd
import numbers as np

s = pd.Series(data=[1, 2, 3, 4], index=[0, 1, 2, 3])
print(s)
print(s[0])

df = pd.DataFrame({"name": ["张三", "李四", "王五"], "age": [1, 23, 24]})

print(df)

print(df["age"] > 20)
# bool索引
print(df[df["age"] > 20])

print(df.count())
print(df.describe())
print("=" * 10)
print(df["age"].std())

# 取出年龄大于平均年龄的数据】
print(df[df["age"] > df["age"].mean()])

df = pd.DataFrame({"x": [1, 2, 3, 4, 5, 6], "y": [9, 8, 7, 6, 5, 4]})
print(df.cov())

# 皮尔逊相关性
print(df.corr())

df = pd.read_csv("students.txt", sep=",",
                 header=None, names=["id", "name", "age", "gender", "clazz"])

print(df)

print(df[df["age"] > 22])

print(df.head())
print(df.tail())
print(df.shape)

df[df["age"] > 23] = None

print(df)

# 每一行之后有一列node就删除数据x
# print(df.dropna())


print(df.dropna(how="all"))

print(df.groupby("clazz").count())

df.groupby("clazz").count()["id"].to_csv("count.txt",header=None)
